library(stringr)
library(data.table)
library(lubridate)
library(tictoc)
library(dplyr)
library(dtplyr)
library(zoo)
library(openxlsx)
library(taRifx)
library(haven)
library(sandwich)
library(lmtest)
library(sqldf)
library(ggplot2)
library(stringr)
library(data.table)
library(lubridate)
library(tictoc)
library(dplyr)
library(dtplyr)
library(zoo)
#library(robustHD)
#library(RColorBrewer)

rm(list=ls())
options(max.print=999999)



# Sets the current path
current_path <- "XXX"
setwd(current_path)


#setwd("~/Corporate_Bond_TRACE/data/TRACE_summarystatistics_combine_most_recent")

tmp1<-fread("SumStat_standard_TRACE.csv")
tmp2<-fread("SumStat_EOD_TRACE.csv")

print(nrow(tmp1))
print(nrow(tmp2))
tmp1<-tmp1[trd_exctn_dt<=20200331]
tmp2<-tmp2[trd_exctn_dt_s>="2020-03-31"]
print(nrow(tmp1))
print(nrow(tmp2))

tmp<-rbind(tmp1,tmp2,fill=TRUE)
setcolorder(tmp,c("trd_exctn_dt","trd_exctn_dt_s"))
tmp<-as.data.table(tmp)

tmp[is.na(trd_exctn_dt),`:=`(trd_exctn_dt=as.integer(paste0(substr(trd_exctn_dt_s,1,4),
                                                            substr(trd_exctn_dt_s,6,7),
                                                            substr(trd_exctn_dt_s,9,10))))]

tmp[,`:=`(DATE=paste0(substr(as.character(trd_exctn_dt),1,4),"-",
                      substr(as.character(trd_exctn_dt),5,6),"-",
                      substr(as.character(trd_exctn_dt),7,8)))]
tmp[,`:=`(trd_exctn_dt_num=trd_exctn_dt)]
tmp[,`:=`(trd_exctn_dt=NULL)]
tmp[,`:=`(trd_exctn_dt=DATE)]
setcolorder(tmp,c("DATE","trd_exctn_dt","trd_exctn_dt_num"))

tmp[,`:=`(weekday=weekdays(as.Date(DATE)))]
tmp<-tmp[!(weekday=="Saturday"|weekday=="Sunday")]

tmp<-tmp[order(DATE)]

tmp<-tmp[!(trd_exctn_dt %in% c("2020-01-04","2020-01-05","2020-01-11","2020-01-12","2020-01-18","2020-01-19","2020-01-25","2020-01-26",
                               "2020-02-01","2020-02-02","2020-02-08","2020-02-09","2020-02-15","2020-02-16","2020-02-22","2020-02-23",
                               "2020-02-29","2020-03-01","2020-03-07","2020-03-08","2020-03-14","2020-03-15","2020-03-21","2020-03-22",
                               "2020-03-28","2020-03-29",
                               "2020-01-20","2020-02-17",
                               "2020-04-10","2020-05-25","2020-07-03","2020-07-04","2020-07-02",
                               "2020-08-13"))]
##############################################
###Comments:
###-On 2020-07-02: Bond traders not only get all of Friday off, but the bond markets will shut down early on Thursday, July 2, closing up shop at 2 p.m.
###-On 2020-08-13: the reason to get rid of it is due to the extreme values.

setcolorder(tmp,c("DATE","trd_exctn_dt"))

tmp<-tmp[DATE>="2020-01-01"]
# X_axis_breaks<-c("2020-02-28","2020-03-09","2020-03-16","2020-03-20","2020-03-23","2020-03-24",
#                  "2020-04-09","2020-04-17","2020-04-21","2020-04-29")
# X_axis_labels<-c("Feb 28","Mar 09","Mar 16","Mar 20","Mar 23","Mar 24",
#                  "Apr 09","Apr 17","Apr 21","Apr 29")

X_axis_breaks<-c("2020-02-19","2020-03-05","2020-03-09","2020-03-18","2020-03-23","2020-03-30",
                 "2020-04-09","2020-04-17","2020-04-21","2020-04-29")
X_axis_labels<-c("Feb 19","Mar 05","Mar 09","Mar 18","Mar 23","Mar 30",
                 "Apr 09","Apr 17","Apr 21","Apr 29")

###for overlapping date 2020-03-31, take simple average of all summary statistics
tmp[,`:=`(count_seq=sequence(.N)),by=c("DATE")]
nrow(tmp[count_seq==2])
overlap_date<-as.character(tmp[count_seq>1,c("DATE"),with=FALSE])
setcolorder(tmp,c("weekday","count_seq"))

for (v in c(colnames(tmp)[7:length(tmp)])){
  tmp[count_seq==1&DATE==overlap_date,(v):=
        colMeans(tmp[DATE==overlap_date,c(v),with=FALSE],na.rm = TRUE)]  
}
print(nrow(tmp))
tmp<-tmp[!(count_seq>1)]
print(nrow(tmp))

###adjust for the six originally used colnames
# COLNAMES_6<-c(setdiff(c(colnames(tmp1)),c(colnames(tmp2))))[!(c(setdiff(c(colnames(tmp1)),c(colnames(tmp2)))) %in% c("trd_exctn_dt"))]
# newnames_COLNAMES_6<-c("wavemean_spread_CH_bondday_bps","wavemean_spread_CH_bondday_IG_bps","wavemean_spread_CH_bondday_HY_bps",
#                        "mean_wave_daily_irc_byvol_all_bps","mean_wave_daily_irc_byvol_IG_bps","mean_wave_daily_irc_byvol_HY_bps")
# print(COLNAMES_6)
# 
# for (i in c(1:6)){
#   var<-as.character(COLNAMES_6[i])
#   tmp[,(paste0(var)):=get(newnames_COLNAMES_6[i])]
# }

# for (i in c(1:6)){
#   var<-as.character(COLNAMES_6[i])
#   tmp[is.na(get(var)),(paste0(var)):=get(newnames_COLNAMES_6[i])]
# }

###re-calculate cumulative inventory change starting from "2020-01-01"
tmp[,`:=`(cumulative_inven_change_all=cumsum(daily_netinflow_vol),
          cumulative_inven_change_IG=cumsum(daily_netinflow_vol_IG),
          cumulative_inven_change_HY=cumsum(daily_netinflow_vol_HY),
          cumulative_inven_change_TTM5yless=cumsum(daily_netinflow_vol_TTM5yless),
          cumulative_inven_change_TTM5ymore=cumsum(daily_netinflow_vol_TTM5ymore),
          
          cumulative_inven_change_US=cumsum(daily_netinflow_vol_US),
          cumulative_inven_change_nonUS=cumsum(daily_netinflow_vol_nonUS),
          cumulative_inven_change_Finance=cumsum(daily_netinflow_vol_Finance),
          cumulative_inven_change_Utility=cumsum(daily_netinflow_vol_Utility),
          cumulative_inven_change_Transportation=cumsum(daily_netinflow_vol_Transportation),
          cumulative_inven_change_OilGas=cumsum(daily_netinflow_vol_OilGas),
          
          cumulative_inven_change_IG_TTM5yless=cumsum(daily_netinflow_vol_IG_TTM5yless),
          cumulative_inven_change_IG_TTM5ymore=cumsum(daily_netinflow_vol_IG_TTM5ymore),
          cumulative_inven_change_HY_TTM5yless=cumsum(daily_netinflow_vol_HY_TTM5yless),
          cumulative_inven_change_HY_TTM5ymore=cumsum(daily_netinflow_vol_HY_TTM5ymore),
          
          cumulative_inven_change_Eligible=cumsum(daily_netinflow_vol_Eligible))]

fwrite(tmp,"SumStats_combine_TRACE_standard_EOD_Jan_June.csv")





